In [2]:
import pandas as pd
import numpy as np
%matplotlib inline
In [3]:
df = pd.read_csv("D:\Box Sync\Fall_2016\data_vis\project\data\\data.csv")
# read the input CSV file as pandas dataframe
In [4]:
df.head(5)
Out[4]:
In [7]:
df.dropna(inplace = True)
# drop NA's if any in the rows
In [11]:
remove_columns = ['game_event_id', 'game_id', 'lat', 'lon', 'team_id', 'game_date', 'shot_id']
# columns to be removed
df.drop(labels = remove_columns, inplace = True, axis = 1)
# # drop the columns
df.head()
In [12]:
df.to_csv("D:\Box Sync\Fall_2016\data_vis\project\data\\processed_data.csv", index = False)
# write data to file
In [ ]: